{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# COMPSCI 389: Introduction to Machine Learning\n", "# Topic 3.0 Nearest Neighbor for Regression\n", "\n", "In this notebook we will create our first ML algorithms for regression.\n", "\n", "As an example, we will apply the Nearest Neighbor algorithm to the GPA data set." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, here are the import statements that we use in this notebook:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd # For representing data sets\n", "from sklearn.base import BaseEstimator # For creating our nearest neighbor model\n", "import numpy as np # For representing arrays\n", "import timeit # For timing different function calls\n", "from sklearn.neighbors import KDTree # For efficient nearest-neighbor searches (more on this below!)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next, let's load the GPA data set and display it as a reminder of what it contains." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | physics | \n", "biology | \n", "history | \n", "English | \n", "geography | \n", "literature | \n", "Portuguese | \n", "math | \n", "chemistry | \n", "gpa | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "622.60 | \n", "491.56 | \n", "439.93 | \n", "707.64 | \n", "663.65 | \n", "557.09 | \n", "711.37 | \n", "731.31 | \n", "509.80 | \n", "1.33333 | \n", "
1 | \n", "538.00 | \n", "490.58 | \n", "406.59 | \n", "529.05 | \n", "532.28 | \n", "447.23 | \n", "527.58 | \n", "379.14 | \n", "488.64 | \n", "2.98333 | \n", "
2 | \n", "455.18 | \n", "440.00 | \n", "570.86 | \n", "417.54 | \n", "453.53 | \n", "425.87 | \n", "475.63 | \n", "476.11 | \n", "407.15 | \n", "1.97333 | \n", "
3 | \n", "756.91 | \n", "679.62 | \n", "531.28 | \n", "583.63 | \n", "534.42 | \n", "521.40 | \n", "592.41 | \n", "783.76 | \n", "588.26 | \n", "2.53333 | \n", "
4 | \n", "584.54 | \n", "649.84 | \n", "637.43 | \n", "609.06 | \n", "670.46 | \n", "515.38 | \n", "572.52 | \n", "581.25 | \n", "529.04 | \n", "1.58667 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
43298 | \n", "519.55 | \n", "622.20 | \n", "660.90 | \n", "543.48 | \n", "643.05 | \n", "579.90 | \n", "584.80 | \n", "581.25 | \n", "573.92 | \n", "2.76333 | \n", "
43299 | \n", "816.39 | \n", "851.95 | \n", "732.39 | \n", "621.63 | \n", "810.68 | \n", "666.79 | \n", "705.22 | \n", "781.01 | \n", "831.76 | \n", "3.81667 | \n", "
43300 | \n", "798.75 | \n", "817.58 | \n", "731.98 | \n", "648.42 | \n", "751.30 | \n", "648.67 | \n", "662.05 | \n", "773.15 | \n", "835.25 | \n", "3.75000 | \n", "
43301 | \n", "527.66 | \n", "443.82 | \n", "545.88 | \n", "624.18 | \n", "420.25 | \n", "676.80 | \n", "583.41 | \n", "395.46 | \n", "509.80 | \n", "2.50000 | \n", "
43302 | \n", "512.56 | \n", "415.41 | \n", "517.36 | \n", "532.37 | \n", "592.30 | \n", "382.20 | \n", "538.35 | \n", "448.02 | \n", "496.39 | \n", "3.16667 | \n", "
43303 rows × 10 columns
\n", "\n", " | physics | \n", "biology | \n", "history | \n", "English | \n", "geography | \n", "literature | \n", "Portuguese | \n", "math | \n", "chemistry | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "622.60 | \n", "491.56 | \n", "439.93 | \n", "707.64 | \n", "663.65 | \n", "557.09 | \n", "711.37 | \n", "731.31 | \n", "509.80 | \n", "
1 | \n", "538.00 | \n", "490.58 | \n", "406.59 | \n", "529.05 | \n", "532.28 | \n", "447.23 | \n", "527.58 | \n", "379.14 | \n", "488.64 | \n", "
2 | \n", "455.18 | \n", "440.00 | \n", "570.86 | \n", "417.54 | \n", "453.53 | \n", "425.87 | \n", "475.63 | \n", "476.11 | \n", "407.15 | \n", "
3 | \n", "756.91 | \n", "679.62 | \n", "531.28 | \n", "583.63 | \n", "534.42 | \n", "521.40 | \n", "592.41 | \n", "783.76 | \n", "588.26 | \n", "
4 | \n", "584.54 | \n", "649.84 | \n", "637.43 | \n", "609.06 | \n", "670.46 | \n", "515.38 | \n", "572.52 | \n", "581.25 | \n", "529.04 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
43298 | \n", "519.55 | \n", "622.20 | \n", "660.90 | \n", "543.48 | \n", "643.05 | \n", "579.90 | \n", "584.80 | \n", "581.25 | \n", "573.92 | \n", "
43299 | \n", "816.39 | \n", "851.95 | \n", "732.39 | \n", "621.63 | \n", "810.68 | \n", "666.79 | \n", "705.22 | \n", "781.01 | \n", "831.76 | \n", "
43300 | \n", "798.75 | \n", "817.58 | \n", "731.98 | \n", "648.42 | \n", "751.30 | \n", "648.67 | \n", "662.05 | \n", "773.15 | \n", "835.25 | \n", "
43301 | \n", "527.66 | \n", "443.82 | \n", "545.88 | \n", "624.18 | \n", "420.25 | \n", "676.80 | \n", "583.41 | \n", "395.46 | \n", "509.80 | \n", "
43302 | \n", "512.56 | \n", "415.41 | \n", "517.36 | \n", "532.37 | \n", "592.30 | \n", "382.20 | \n", "538.35 | \n", "448.02 | \n", "496.39 | \n", "
43303 rows × 9 columns
\n", "